This script serves as a playing ground to figure out which variables serve best to build a good model, to predict our dependend variable.
remove(list = ls())
library(reactable)
library(sf)
library(data.table)
library(tidyverse)
library(corrplot)
library(reshape2)
library(igraph)
library(psych)
library(openxlsx)sample <- readRDS("../data_raw/samples/sample_for_model_building.RDS")
ncol(sample) ### abzügzlich der 5 nicht inhaltlichen Variablen## [1] 164
## [1] 2260 164
sample <- sample %>% as.data.frame()
### delete administrative variables
tmp_variable <- c("i00","idep","iprov","imun","ID_mun","ID_prov")
sample <- sample[,!colnames(sample) %in% tmp_variable]we have 164 variables, minus the 5 “administrative” ones results in 159 variables
filter for variables which have lots of NAs
dat_table_nas <- tibble(
"colnames" = colnames(sample),
"pecentageNAs" = sapply(
sample,
FUN = function(x)
round(mean(is.na(x)), 3)
)
)
hist(dat_table_nas$pecentageNAs,breaks = 40)because we need values to actually contain information we will only take a look at the variables, that have at least 50% not NA
## [1] 41
In the next step i will run just a single model to get a rough idea of how well two variables are connected and could be used for prediction
Because of simplicity I will just ran a numeric and factorial model for each variable. I will exclude the variables which have way to many levels (over 500). This loop either jumps the variable, if the variable only has 1 level or over 500.
list_variables <- dat_selection$colnames
dat_ko_variables <- data_frame(
"variable" = character(),
"r_squared_num" = character(),
"p_value_num" = character(),
"r_squared_factor" = character(),
"p_value_factor" = character(),
"n_fac_lelves" = character(),
"min" = character(),
"qnt_1st" = character(),
"median" = character(),
"mean" = character(),
"qnt_3rd" = character(),
"max" = character(),
"NA_percentage" = character()
)## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
for (i in 1:length(list_variables)) {
tmp_object <- list_variables[i]
tmp_variable_num <- sample[[tmp_object]]
tmp_variable_fac <- tmp_variable_num %>% as.factor()
tmp_length_variable <- table(tmp_variable_fac) %>% length()
print(paste0("now doing number: ", i))
### dont run model if more then 15 factor
if (tmp_length_variable > 500) {
tmp_data <- data_frame(
"variable" = tmp_object,
"r_squared_num" = NA,
"p_value_num" = NA,
"r_squared_factor" = NA,
"p_value_factor" = NA,
"n_fac_lelves" = tmp_length_variable,
"min" = summary(tmp_variable_num)[1],
"qnt_1st" = summary(tmp_variable_num)[2],
"median" = summary(tmp_variable_num)[3],
"mean" = summary(tmp_variable_num)[4],
"qnt_3rd" = summary(tmp_variable_num)[5],
"max" = summary(tmp_variable_num)[6],
"NA_percentage" = mean(is.na(tmp_variable_num))
)
dat_ko_variables <- rbind(dat_ko_variables, tmp_data)
print("early exit, too many")
next
} else if (tmp_length_variable <= 1) {
tmp_data <- data_frame(
"variable" = tmp_object,
"r_squared_num" = NA,
"p_value_num" = NA,
"r_squared_factor" = NA,
"p_value_factor" = NA,
"n_fac_lelves" = tmp_length_variable,
"min" = summary(tmp_variable_num)[1],
"qnt_1st" = summary(tmp_variable_num)[2],
"median" = summary(tmp_variable_num)[3],
"mean" = summary(tmp_variable_num)[4],
"qnt_3rd" = summary(tmp_variable_num)[5],
"max" = summary(tmp_variable_num)[6],
"NA_percentage" = mean(is.na(tmp_variable_num))
)
dat_ko_variables <- rbind(dat_ko_variables, tmp_data)
print("early exit, one or less")
next
} else {
formula <- as.formula(paste("aestudio ~", tmp_object))
model_num <- lm(sample$aestudio ~ tmp_variable_num)
model_fac <- lm(sample$aestudio ~ tmp_variable_fac)
r_squared_num <- summary(model_num)$r.squared
p_value_num <- summary(model_num)$coefficients[1, 4]
r_squared_factor <- summary(model_fac)$r.squared
p_value_factor <- summary(model_fac)$coefficients[1, 4]
tmp_data <- data_frame(
"variable" = tmp_object,
"r_squared_num" = r_squared_num,
"p_value_num" = p_value_num,
"r_squared_factor" = r_squared_factor,
"p_value_factor" = p_value_factor,
"n_fac_lelves" = tmp_length_variable,
"min" = summary(tmp_variable_num)[1],
"qnt_1st" = summary(tmp_variable_num)[2],
"median" = summary(tmp_variable_num)[3],
"mean" = summary(tmp_variable_num)[4],
"qnt_3rd" = summary(tmp_variable_num)[5],
"max" = summary(tmp_variable_num)[6],
"NA_percentage" = mean(is.na(tmp_variable_num))
)
dat_ko_variables <- rbind(dat_ko_variables, tmp_data)
}
}## [1] "now doing number: 1"
## [1] "now doing number: 2"
## [1] "now doing number: 3"
## [1] "now doing number: 4"
## [1] "now doing number: 5"
## [1] "now doing number: 6"
## [1] "now doing number: 7"
## [1] "now doing number: 8"
## [1] "now doing number: 9"
## [1] "now doing number: 10"
## [1] "now doing number: 11"
## [1] "now doing number: 12"
## [1] "now doing number: 13"
## [1] "now doing number: 14"
## [1] "now doing number: 15"
## [1] "now doing number: 16"
## [1] "now doing number: 17"
## [1] "now doing number: 18"
## [1] "now doing number: 19"
## [1] "now doing number: 20"
## [1] "now doing number: 21"
## [1] "now doing number: 22"
## [1] "now doing number: 23"
## [1] "now doing number: 24"
## [1] "now doing number: 25"
## [1] "now doing number: 26"
## [1] "now doing number: 27"
## [1] "now doing number: 28"
## [1] "now doing number: 29"
## [1] "now doing number: 30"
## [1] "now doing number: 31"
## [1] "now doing number: 32"
## [1] "now doing number: 33"
## [1] "now doing number: 34"
## [1] "now doing number: 35"
## [1] "now doing number: 36"
## [1] "now doing number: 37"
## [1] "now doing number: 38"
## [1] "now doing number: 39"
## [1] "now doing number: 40"
## [1] "now doing number: 41"
## [1] "now doing number: 42"
## [1] "now doing number: 43"
## [1] "now doing number: 44"
## [1] "now doing number: 45"
## [1] "now doing number: 46"
## [1] "now doing number: 47"
## [1] "now doing number: 48"
## Warning in summary.lm(model_num): essentially perfect fit: summary may be
## unreliable
## Warning in summary.lm(model_num): essentially perfect fit: summary may be
## unreliable
## [1] "now doing number: 49"
## [1] "now doing number: 50"
## [1] "now doing number: 51"
## [1] "now doing number: 52"
## [1] "now doing number: 53"
## [1] "now doing number: 54"
## [1] "now doing number: 55"
## [1] "now doing number: 56"
## [1] "now doing number: 57"
## [1] "now doing number: 58"
## [1] "now doing number: 59"
## [1] "now doing number: 60"
## [1] "now doing number: 61"
## [1] "now doing number: 62"
## [1] "now doing number: 63"
## [1] "now doing number: 64"
## [1] "now doing number: 65"
## [1] "now doing number: 66"
## [1] "now doing number: 67"
## [1] "now doing number: 68"
## [1] "now doing number: 69"
## [1] "now doing number: 70"
## [1] "now doing number: 71"
## [1] "now doing number: 72"
## [1] "now doing number: 73"
## [1] "now doing number: 74"
## [1] "now doing number: 75"
## [1] "now doing number: 76"
## [1] "now doing number: 77"
## [1] "now doing number: 78"
## [1] "early exit, one or less"
## [1] "now doing number: 79"
## [1] "now doing number: 80"
## [1] "now doing number: 81"
## [1] "now doing number: 82"
## [1] "now doing number: 83"
## [1] "now doing number: 84"
## [1] "now doing number: 85"
## [1] "now doing number: 86"
## [1] "now doing number: 87"
## [1] "now doing number: 88"
## [1] "now doing number: 89"
## [1] "now doing number: 90"
## [1] "now doing number: 91"
## [1] "now doing number: 92"
## [1] "now doing number: 93"
## [1] "now doing number: 94"
## [1] "now doing number: 95"
## [1] "now doing number: 96"
## [1] "now doing number: 97"
## [1] "now doing number: 98"
## [1] "now doing number: 99"
## [1] "now doing number: 100"
## [1] "now doing number: 101"
## [1] "now doing number: 102"
## [1] "now doing number: 103"
## [1] "now doing number: 104"
## [1] "now doing number: 105"
## [1] "now doing number: 106"
## [1] "now doing number: 107"
## [1] "now doing number: 108"
## [1] "now doing number: 109"
## [1] "now doing number: 110"
## [1] "now doing number: 111"
## [1] "now doing number: 112"
## [1] "now doing number: 113"
## [1] "now doing number: 114"
## [1] "now doing number: 115"
## [1] "now doing number: 116"
## [1] "now doing number: 117"
## convert column to numeric
dat_ko_variables$mean <- dat_ko_variables$mean %>% as.numeric()
### round variables to 3 digits after 0
dat_ko_variables <- dat_ko_variables %>%
mutate(across(where(is.numeric), ~ round(.x, 3)))
dat_ko_variables %>% reactable(.,compact = T, filterable = T,defaultPageSize = 20)## remove aestudio
dat_ko_variables_selection <- dat_ko_variables %>% dplyr::filter(variable != "aestudio")
### remove variables with high fac levels
(dat_ko_variables_selection$n_fac_lelves >= 15) %>% sum()## [1] 24
dat_ko_variables_selection <- dat_ko_variables_selection[dat_ko_variables_selection$n_fac_lelves < 15,]
# ### remove variables with high NA-ratio
# (dat_ko_variables_selection$NA_percentage > .25) %>% sum()
# dat_ko_variables_selection <- dat_ko_variables_selection[dat_ko_variables_selection$NA_percentage < .25,]
### remove variables, which are too close conceptually
dat_ko_variables_selection <- dat_ko_variables_selection %>% filter(!variable %in% c("p41a_nivel","p41b_curso","p41a_nivel_act","p41b_curso_act","nivel_edu","p38_asiste", "asiste"))## .
## FALSE
## 2260
sample <- sample %>% as.data.frame()
### filter with factors over 15
idx <- sapply(sample,FUN = function(x) length(unique(x)) > 15)
idx %>% table()## .
## FALSE TRUE
## 126 32
sample2 <- sample[, !idx]
### filter with na percentage higher then .5
idx <- sapply(sample2,FUN = function(x) mean(is.na(x)) > .5)
idx %>% table()## .
## FALSE TRUE
## 93 33
sample2 <- sample2[, !idx]
### other non informative variables filter
sample2 <- sample2[,!colnames(sample2) %in% c("idep","imun")]
sample_fast <- data.frame(lapply(sample2, function(x) {
if (is.factor(x)) as.numeric(x) else x
}))
sample2## Loading required package: grid
tmp_data <- matrix(ncol = 3,nrow = 0) %>% as.data.frame()
colnames(tmp_data) <- c("v1","v2","cramresV")
for(i in seq_along(sample2)){
for(j in seq_along(sample2)){
x1 <- sample2[,i] %>% as.factor()
x2 <- sample2[,j] %>% as.factor()
tbl <- table(x1, x2)
tmp_vec <- c(colnames(sample2)[i],colnames(sample2)[j],assocstats(tbl)$cramer[[1]]) %>% unlist()
tmp_data <- rbind(tmp_data,tmp_vec)
}
}
colnames(tmp_data) <- c("v1","v2","cramresV")
This is a histogramm showing the distribution of the correlation between
all variables.
cram_mat <- tmp_data %>%
filter(v1 != v2) %>% # remove self-pairs
pivot_wider(
names_from = v2,
values_from = cramresV
) %>%
column_to_rownames("v1") %>%
as.matrix()
dim(cram_mat)## [1] 93 93
cram_long <- melt(cram_mat)
corr_matrix_2 <- ggplot(cram_long, aes(Var1, Var2, fill = value)) +
geom_tile() +
scale_fill_viridis_c() +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 90, hjust = 1, size = 6),
axis.text.y = element_text(size = 6)
) +
labs(fill = "Cramér's V")
corr_matrix_2ggsave("../output/corr_matrix-2.png",plot = corr_matrix_2,width = 10,height = 6, device = "png",dpi = 300)dat_selection <- tmp_data
### drop the pair with itself
dat_selection <- dat_selection[dat_selection$v1 != dat_selection$v2,]
dat_selection <- dat_selection[!is.na(dat_selection$cramresV),]
dat_selection$cramresV %>% hist()### load in list from. A
list_variables <- readRDS("../data_raw/misc/list_variables_seleciton.RDS")
### filter for only these variables
dat_selection <- dat_selection[dat_selection$v1 %in% list_variables,]
dat_selection$pair_sorted <- apply(dat_selection[, c("v1","v2")], 1, function(x) paste(sort(x), collapse = "_"))
dat_selection_unique <- dat_selection[!duplicated(dat_selection$pair_sorted), ]
dat_selection_unique[,1:3] %>% reactable(.,compact = T,filterable = T)# dat_selection_unique: v1, v2, cramresV
threshold <- 0.75
# keep only strong pairs
strong_pairs <- dat_selection_unique %>% filter(cramresV > threshold)
# build graph
g <- graph_from_data_frame(strong_pairs[, c("v1", "v2")], directed = FALSE)
# find connected components
comp <- components(g)
# create a dataframe with groups
group_df <- data.frame(
var = names(comp$membership),
group = comp$membership
)
# optional: collapse variables in each group into one row
grouped_vars <- group_df %>%
group_by(group) %>%
summarise(vars = paste(var, collapse = ", "), .groups = "drop")
reactable(grouped_vars)## [1] 85
## [1] "p49_ocu_1d" "ocu_1d_19" "ocu_1d_13" "p40_lee"
## [5] "gedadedu" "g_edad_bol" "v19c_compu" "p42c_camina"
## [9] "g_edad" "p50_catocu_13" "p53_ecivil" "p42d_comuni"
## [13] "p50_semp" "v10_combus" "v11_basura" "v06_piso"
## [17] "v19e_inetfijo" "v16_desague" "v18j_lavadora" "v19e_f"
## [21] "p42b_oir" "p31_afiliado" "p45_agro" "urbrur"
## [25] "v19b_tv" "v19f_inetmovil" "p30b_caja" "v18f_refri"
## [29] "p42_discap" "p42a_ver" "v03_pared" "p43_pago"
## [33] "v18g_micro" "v15_servsan" "p32_pueblo_per" "v08_aguadist"
## [37] "p52_mov" "v19g_tvcable" "v04_revoq" "v17_tenencia"
## [41] "v18c_auto" "v09_energia" "v19d_celular" "v19h_d"
## [45] "dep_nac_cod" "dep_lab_cod" "v14_dormit" "v18i_aire"
## [49] "dep_res_cod" "v07_aguapro" "dep_res5_cod" "condact_19"
## [53] "v05_techo" "v18a_bici" "v18h_calefon" "tip_hog"
## [57] "p37_lugres5" "p25_sexo" "p30c_privad" "v13_habitac"
## [61] "v18b_moto" "v19a_radio" "v19h_telfijo" "p30d_atedom"
## [65] "p36_lugres" "v20a_emi" "p30f_autome" "p30e_tradic"
## [69] "p35_lugnac" "p44_nego" "p29_ci" "p30g_casera"
## [73] "v18d_carreta" "p28_cn" "p30a_public" "condact_13"
## [77] "pea_13" "v12_cocina" "v18e_bote" "v21a_fal"
## [81] "p31_cobersalud" "pet_19" "ft_19" "pet_13"
## [85] "v02_condocup"
### remove highly correlated items (9 gorups identified in script B)
# highly correlated with p28_cn,
tmp_variables <- "p29_ci"
dat_ko_variables_selection <- dat_ko_variables_selection %>% filter(!variable %in%
tmp_variables)
# highly correlated with p30f_autome,
tmp_variables <- "p30g_casera"
dat_ko_variables_selection <- dat_ko_variables_selection %>% filter(!variable %in%
tmp_variables)
# highly correlated with edad,
tmp_variables <- c("g_edad","g_edad_bol","gedadedu")
dat_ko_variables_selection$variable %in% tmp_variables %>% sum()## [1] 3
dat_ko_variables_selection <- dat_ko_variables_selection %>% filter(!variable %in%
tmp_variables)
# highly correlated with pet_19,
tmp_variables <- c("condact_19","pet_13","pea_13","condact_13","ft_19") ## condact_13 already filtered out
tmp_variables %in% dat_ko_variables_selection$variable %>% sum()## [1] 5
dat_ko_variables_selection <- dat_ko_variables_selection %>% filter(!variable %in%
tmp_variables)
# highly correlated with pet_19,
tmp_variables <- c("condact_19","pet_13","pea_13","condact_13","ft_19") ### all of them already filtered out, only pet_19 in
tmp_variables %in% dat_ko_variables_selection$variable %>% sum()## [1] 0
dat_ko_variables_selection <- dat_ko_variables_selection %>% filter(!variable %in%
tmp_variables)
# highly correlated with ocu_1d_13,
tmp_variables <- c("p49_ocu_1d","ocu_1d_19") ### all already filtered out
tmp_variables %in% dat_ko_variables_selection$variable %>% sum()## [1] 2
dat_ko_variables_selection <- dat_ko_variables_selection %>% filter(!variable %in%
tmp_variables)
# highly correlated with p50_catocu_13,
tmp_variables <- c("p50_semp") ### already filtered out
tmp_variables %in% dat_ko_variables_selection$variable %>% sum()## [1] 1
dat_ko_variables_selection <- dat_ko_variables_selection %>% filter(!variable %in%
tmp_variables)
# highly correlated with v18f_refri,
tmp_variables <- c("v18g_micro","v18j_lavadora","v19c_compu","v19e_inetfijo","v19b_tv","v19g_tvcable")
tmp_variables %in% dat_ko_variables_selection$variable %>% sum()## [1] 6
dat_ko_variables_selection <- dat_ko_variables_selection %>% filter(!variable %in%
tmp_variables)
# highly correlated with v19d_celular,
tmp_variables <- c("v19f_inetmovil","v19e_f","v19h_d")
tmp_variables %in% dat_ko_variables_selection$variable %>% sum()## [1] 3
dat_ko_variables_selection <- dat_ko_variables_selection %>% filter(!variable %in%
tmp_variables)
### remove because of singularity
tmp_variables <- c("v02_condocup")
tmp_variables %in% dat_ko_variables_selection$variable %>% sum()## [1] 1
dat_ko_variables_selection <- dat_ko_variables_selection %>% filter(!variable %in%
tmp_variables)
### remove municipality codes
tmp_variables <- c("dep_res_cod","dep_res5_cod","dep_lab_cod","dep_nac_cod") ### dep_lab_cod already out
tmp_variables %in% dat_ko_variables_selection$variable %>% sum()## [1] 4
dat_ko_variables_selection <- dat_ko_variables_selection %>% filter(!variable %in%
tmp_variables)
candidates <- dat_ko_variables_selection[,"variable"]
sample <- sample %>% as.data.frame()
sample_candidates <- sample[,c("aestudio",candidates$variable) %in% colnames(sample)]
formula <- paste("aestudio ~",paste(c("p26_edad",candidates$variable),collapse = " + " ))
formula <- formula %>% as.formula()# sample$p332_idiohab2_cod[is.na(sample$p332_idiohab2_cod)] <- "Missing"
### replace all NAs with Missing, so its its own factor
## becase sample_candidates[,candidates$variable] %>% na.omit()
## in a big shrinkage of the data frame
sample_candidates[,candidates$variable][is.na(sample_candidates[,candidates$variable])] <- "Missing"
## convert all of them to factors
for (col in candidates$variable) {
sample_candidates[[col]] <- factor(sample_candidates[[col]])
}
lm(formula = formula,data = sample_candidates) %>% summary()##
## Call:
## lm(formula = formula, data = sample_candidates)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.8193 -2.3028 0.1004 2.2940 13.2958
##
## Coefficients: (43 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.2104219 3.9346049 4.882 1.13e-06 ***
## p26_edad -0.0944648 0.0067601 -13.974 < 2e-16 ***
## ocu_1d_131 -0.2818353 4.0358810 -0.070 0.944334
## ocu_1d_132 1.0557710 3.6409707 0.290 0.771868
## ocu_1d_133 -1.7523047 3.6671352 -0.478 0.632814
## ocu_1d_134 -2.5010378 3.6929435 -0.677 0.498324
## ocu_1d_135 -4.4857981 3.6333252 -1.235 0.217109
## ocu_1d_136 -5.7465548 3.6278550 -1.584 0.113344
## ocu_1d_137 -4.9174309 3.6302191 -1.355 0.175698
## ocu_1d_138 -5.2982827 3.6338236 -1.458 0.144979
## ocu_1d_139 -5.7192420 3.6728639 -1.557 0.119585
## ocu_1d_1397 -3.7254489 3.6805048 -1.012 0.311555
## ocu_1d_1399 -6.3276195 3.6890172 -1.715 0.086447 .
## ocu_1d_13Missing -4.6322148 3.6710459 -1.262 0.207154
## p40_lee2 -5.1112426 0.3433251 -14.887 < 2e-16 ***
## p40_lee9 -6.7071898 1.4655773 -4.576 5.01e-06 ***
## p42c_camina2 -0.4000825 0.2900252 -1.379 0.167897
## p42c_camina3 0.3148241 0.6605354 0.477 0.633683
## p42c_camina4 -0.8733877 1.2303573 -0.710 0.477868
## p42c_camina9 -0.5558672 1.4070368 -0.395 0.692837
## p50_catocu_132 -0.0799209 0.2586223 -0.309 0.757333
## p50_catocu_133 0.5420167 0.7340109 0.738 0.460336
## p50_catocu_134 0.3759010 0.3638362 1.033 0.301649
## p50_catocu_135 -1.3521822 1.0756911 -1.257 0.208882
## p50_catocu_136 -0.9553811 1.5438101 -0.619 0.536086
## p50_catocu_139 -0.0533627 0.3138500 -0.170 0.865006
## p50_catocu_13Missing NA NA NA NA
## p53_ecivil2 -0.3706055 0.2258943 -1.641 0.101029
## p53_ecivil3 -0.2817358 0.5035782 -0.559 0.575903
## p53_ecivil4 0.9757556 0.6939424 1.406 0.159843
## p53_ecivil5 -0.1037982 0.3801780 -0.273 0.784861
## p53_ecivil6 0.4380234 0.2368831 1.849 0.064584 .
## p53_ecivil9 -0.8644552 0.5480420 -1.577 0.114867
## p42d_comuni2 -0.7457360 0.2750420 -2.711 0.006756 **
## p42d_comuni3 -0.1140062 0.7646217 -0.149 0.881488
## p42d_comuni4 -0.3861464 1.4554750 -0.265 0.790800
## p42d_comuni9 0.2877857 1.3638270 0.211 0.832898
## v10_combus2 -0.1119280 0.3058863 -0.366 0.714467
## v10_combus3 -0.2860762 0.2284975 -1.252 0.210715
## v10_combus4 -1.1361343 0.8391952 -1.354 0.175935
## v10_combus6 1.8756787 3.5616813 0.527 0.598509
## v10_combus7 4.1596093 2.5741540 1.616 0.106265
## v10_combus8 -0.5630082 0.9342084 -0.603 0.546802
## v10_combusMissing -1.0976314 1.2800020 -0.858 0.391255
## v11_basura2 0.0110451 0.3076631 0.036 0.971366
## v11_basura3 -0.8874901 0.5178427 -1.714 0.086711 .
## v11_basura4 -0.4015246 0.5369580 -0.748 0.454680
## v11_basura5 -0.3825404 0.3351300 -1.141 0.253806
## v11_basura6 -0.0370118 0.4191637 -0.088 0.929647
## v11_basura7 0.6433218 0.8070238 0.797 0.425453
## v11_basuraMissing NA NA NA NA
## v06_piso2 1.5614058 0.5570684 2.803 0.005112 **
## v06_piso3 2.0704270 0.6363746 3.253 0.001158 **
## v06_piso4 0.6615894 0.3252521 2.034 0.042070 *
## v06_piso5 0.2450679 0.2378498 1.030 0.302967
## v06_piso6 0.3346494 0.8144418 0.411 0.681192
## v06_piso7 1.0352587 0.4493166 2.304 0.021317 *
## v06_piso8 0.9321456 1.3820962 0.674 0.500105
## v06_piso9 2.4671876 1.0286138 2.399 0.016548 *
## v06_pisoMissing NA NA NA NA
## v16_desague2 0.4588161 0.3495231 1.313 0.189432
## v16_desague3 -0.8201033 0.2561343 -3.202 0.001386 **
## v16_desague4 -0.7172989 0.5914831 -1.213 0.225378
## v16_desague5 -1.6533004 0.8462413 -1.954 0.050871 .
## v16_desague6 -0.8536275 0.7966089 -1.072 0.284035
## v16_desagueMissing -0.8706840 0.2985507 -2.916 0.003579 **
## p42b_oir2 -0.0014114 0.2873346 -0.005 0.996081
## p42b_oir3 -0.1241268 0.7151049 -0.174 0.862214
## p42b_oir4 -0.2764868 1.5749996 -0.176 0.860667
## p42b_oir9 -0.5553549 1.5909251 -0.349 0.727067
## p31_afiliado2 0.9077778 0.3437572 2.641 0.008334 **
## p31_afiliado3 2.6840713 1.0283822 2.610 0.009119 **
## p31_afiliado4 -0.2683604 0.2198853 -1.220 0.222431
## p31_afiliado9 0.4972290 0.7398978 0.672 0.501643
## p45_agro2 0.4890574 0.3936882 1.242 0.214286
## p45_agro9 0.7078342 1.1282783 0.627 0.530494
## p45_agroMissing 0.5668453 0.3097008 1.830 0.067348 .
## urbrur2 0.2250764 0.2404200 0.936 0.349289
## p30b_caja2 -0.7216982 0.2695941 -2.677 0.007487 **
## p30b_caja9 1.4770033 4.4005212 0.336 0.737174
## v18f_refri2 -0.0029766 0.2160107 -0.014 0.989007
## v18f_refri9 -3.1240721 6.5128212 -0.480 0.631505
## v18f_refriMissing NA NA NA NA
## p42_discap2 0.4957764 0.6329367 0.783 0.433543
## p42_discapMissing 0.3944657 1.4825118 0.266 0.790205
## p42a_ver2 0.0377055 0.2131570 0.177 0.859612
## p42a_ver3 -0.0332640 0.6417041 -0.052 0.958664
## p42a_ver4 1.4563954 1.1384931 1.279 0.200959
## p42a_ver9 0.8698975 1.7694169 0.492 0.623033
## v03_pared2 0.3489045 0.2219958 1.572 0.116179
## v03_pared3 0.2549566 1.1618921 0.219 0.826335
## v03_pared4 0.1664070 0.5472914 0.304 0.761116
## v03_pared5 -0.0582422 0.3774089 -0.154 0.877372
## v03_pared6 1.7531384 0.9774126 1.794 0.073014 .
## v03_pared7 0.9747964 0.7768404 1.255 0.209685
## v03_paredMissing NA NA NA NA
## p43_pago2 -0.0378334 0.2655563 -0.142 0.886724
## p43_pago9 NA NA NA NA
## v15_servsan2 0.1889236 0.2850345 0.663 0.507526
## v15_servsan3 NA NA NA NA
## v15_servsanMissing NA NA NA NA
## p32_pueblo_per2 0.1846702 0.1772236 1.042 0.297525
## p32_pueblo_per9 0.3671801 0.6590916 0.557 0.577519
## v08_aguadist2 -0.2672088 0.2160463 -1.237 0.216296
## v08_aguadist3 -0.0822314 0.3939978 -0.209 0.834695
## v08_aguadistMissing NA NA NA NA
## p52_mov2 -0.0214448 0.2420912 -0.089 0.929423
## p52_mov3 0.1892796 0.3657375 0.518 0.604842
## p52_mov4 -2.5862087 2.1604884 -1.197 0.231424
## p52_mov9 0.6561976 0.4523655 1.451 0.147045
## p52_movMissing -0.0806859 0.4105565 -0.197 0.844216
## v04_revoq2 -0.4707360 0.2215429 -2.125 0.033720 *
## v04_revoqMissing NA NA NA NA
## v17_tenencia2 -0.6237885 0.3977864 -1.568 0.116999
## v17_tenencia3 -0.3879971 0.3616342 -1.073 0.283441
## v17_tenencia4 -0.2712238 0.3614987 -0.750 0.453174
## v17_tenencia5 1.1752844 0.8376042 1.403 0.160722
## v17_tenencia6 -0.2275831 1.6141247 -0.141 0.887888
## v17_tenencia7 -0.6972228 0.6221163 -1.121 0.262533
## v17_tenencia8 -0.0001743 0.5874069 0.000 0.999763
## v17_tenenciaMissing NA NA NA NA
## v18c_auto2 -0.2297225 0.2063698 -1.113 0.265769
## v18c_auto9 NA NA NA NA
## v18c_autoMissing NA NA NA NA
## v09_energia2 0.0584269 0.7276295 0.080 0.936008
## v09_energia3 0.1503551 0.4353706 0.345 0.729866
## v09_energia4 -0.0039199 0.8196860 -0.005 0.996185
## v09_energia5 -0.2670975 0.2638030 -1.012 0.311422
## v09_energiaMissing NA NA NA NA
## v19d_celular2 -0.2760474 0.2508965 -1.100 0.271353
## v19d_celular9 -0.1234754 3.6787317 -0.034 0.973228
## v19d_celularMissing NA NA NA NA
## v14_dormit1 0.3608706 0.4471501 0.807 0.419733
## v14_dormit2 0.3365191 0.5028521 0.669 0.503429
## v14_dormit3 0.0806014 0.5503906 0.146 0.883585
## v14_dormit4 0.1154080 0.6434357 0.179 0.857671
## v14_dormit5 0.9615351 0.8519542 1.129 0.259187
## v14_dormit6 -0.5157838 1.1840459 -0.436 0.663164
## v14_dormit7 1.3640393 2.2616124 0.603 0.546490
## v14_dormit8 -1.1205601 1.8484363 -0.606 0.544435
## v14_dormitMissing NA NA NA NA
## v18i_aire2 -0.2890207 0.4779831 -0.605 0.545466
## v18i_aire9 NA NA NA NA
## v18i_aireMissing NA NA NA NA
## v07_aguapro2 -0.5844105 0.4551512 -1.284 0.199288
## v07_aguapro3 1.1948062 0.5332159 2.241 0.025147 *
## v07_aguapro4 -0.2091616 0.3737204 -0.560 0.575762
## v07_aguapro5 0.3498870 0.4454506 0.785 0.432269
## v07_aguapro6 0.3687814 0.4085629 0.903 0.366827
## v07_aguapro7 0.2763270 0.4356201 0.634 0.525935
## v07_aguapro8 -0.9033876 0.7224930 -1.250 0.211303
## v07_aguapro9 0.4640759 0.6815507 0.681 0.496003
## v07_aguaproMissing NA NA NA NA
## v05_techo2 -0.3292063 0.2207815 -1.491 0.136088
## v05_techo3 0.0729969 0.3959786 0.184 0.853760
## v05_techo4 -0.0425353 0.2776541 -0.153 0.878259
## v05_techo5 -0.3987052 0.6550317 -0.609 0.542803
## v05_techoMissing NA NA NA NA
## v18a_bici2 -0.4681932 0.1779664 -2.631 0.008581 **
## v18a_bici9 -3.5237041 7.4000349 -0.476 0.634001
## v18a_biciMissing NA NA NA NA
## v18h_calefon2 -0.8530397 0.4848019 -1.760 0.078629 .
## v18h_calefon9 NA NA NA NA
## v18h_calefonMissing NA NA NA NA
## tip_hog2 -0.0862737 0.3771540 -0.229 0.819086
## tip_hog3 -0.0663085 0.3036041 -0.218 0.827135
## tip_hog4 -0.2105191 0.2761963 -0.762 0.446022
## tip_hog5 0.2451188 0.2590300 0.946 0.344108
## tip_hog6 -0.2236652 0.4123510 -0.542 0.587591
## tip_hog7 0.0474900 1.8252988 0.026 0.979246
## tip_hogMissing NA NA NA NA
## p37_lugres52 0.6001530 0.2696144 2.226 0.026123 *
## p37_lugres53 0.6061429 0.8639428 0.702 0.483007
## p37_lugres59 0.1516095 0.8439972 0.180 0.857458
## p25_sexo2 0.6374010 0.1701282 3.747 0.000184 ***
## p30c_privad2 0.0064150 0.2218082 0.029 0.976930
## p30c_privad9 -2.4268429 3.9583784 -0.613 0.539884
## v13_habitac2 -0.4659830 0.2463923 -1.891 0.058734 .
## v13_habitac3 0.0222914 0.3113937 0.072 0.942938
## v13_habitac4 -0.1232138 0.3850329 -0.320 0.748994
## v13_habitac5 -0.2919635 0.4973574 -0.587 0.557248
## v13_habitac6 1.2394283 0.6443194 1.924 0.054538 .
## v13_habitac7 0.1539542 0.8390673 0.183 0.854437
## v13_habitac8 0.7395351 0.8350556 0.886 0.375929
## v13_habitacMissing NA NA NA NA
## v18b_moto2 0.1350868 0.1896116 0.712 0.476273
## v18b_moto9 NA NA NA NA
## v18b_motoMissing NA NA NA NA
## v19a_radio2 -0.1787940 0.1711935 -1.044 0.296423
## v19a_radio9 2.8176183 6.2539598 0.451 0.652373
## v19a_radioMissing NA NA NA NA
## v19h_telfijo2 -0.3588507 0.4478725 -0.801 0.423088
## v19h_telfijo9 -0.7875086 5.1307856 -0.153 0.878029
## v19h_telfijoMissing NA NA NA NA
## p30d_atedom2 0.3972084 0.2597692 1.529 0.126396
## p30d_atedom9 NA NA NA NA
## p36_lugres2 0.3201246 0.4008118 0.799 0.424561
## v20a_emi2 0.5480362 0.2744377 1.997 0.045962 *
## v20a_emiMissing NA NA NA NA
## p30f_autome2 -0.1795617 0.1637213 -1.097 0.272877
## p30f_autome9 NA NA NA NA
## p30e_tradic2 0.1567589 0.1721195 0.911 0.362529
## p30e_tradic9 NA NA NA NA
## p35_lugnac2 -0.1810258 0.1798978 -1.006 0.314403
## p35_lugnac3 0.6683294 0.9246367 0.723 0.469883
## p35_lugnac9 0.5102827 1.3209981 0.386 0.699325
## p44_nego2 NA NA NA NA
## p44_nego9 NA NA NA NA
## p44_negoMissing NA NA NA NA
## v18d_carreta2 0.0718181 0.3650621 0.197 0.844059
## v18d_carreta9 NA NA NA NA
## v18d_carretaMissing NA NA NA NA
## p28_cn2 -0.9838794 1.2524833 -0.786 0.432225
## p28_cn9 -0.6870736 2.7595043 -0.249 0.803397
## p30a_public2 -0.1852839 0.2141764 -0.865 0.387084
## p30a_public9 1.0773741 2.1579944 0.499 0.617658
## v12_cocina2 0.5270537 0.2050186 2.571 0.010217 *
## v12_cocinaMissing NA NA NA NA
## v18e_bote2 -0.4763776 0.5142472 -0.926 0.354367
## v18e_bote9 2.1666032 3.5973508 0.602 0.547055
## v18e_boteMissing NA NA NA NA
## v21a_fal2 0.2546112 0.2605269 0.977 0.328538
## v21a_falMissing NA NA NA NA
## p31_cobersalud2 NA NA NA NA
## p31_cobersaludMissing NA NA NA NA
## pet_199 -1.1060303 0.5637385 -1.962 0.049901 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.467 on 2077 degrees of freedom
## Multiple R-squared: 0.5806, Adjusted R-squared: 0.5438
## F-statistic: 15.8 on 182 and 2077 DF, p-value: < 2.2e-16
model_full <- lm(formula = formula,data = sample_candidates)
model_null <- lm(formula = aestudio ~ 1,data = sample_candidates)
# model_full %>% summary()
model_backward <- step(model_full,direction = "backward",trace = 0)
# model_backward %>% summary()model_forward <- step(model_null,scope = formula, direction = "forward", trace = 0)
# model_forward %>% summary()model_both <- step(model_null,scope = formula, direction = "both", trace = 0)
# model_both %>% summary()## lm(formula = aestudio ~ p26_edad + ocu_1d_13 + p40_lee + p53_ecivil +
## p42d_comuni + v06_piso + v16_desague + p31_afiliado + p30b_caja +
## v04_revoq + v18c_auto + v07_aguapro + v18a_bici + v18h_calefon +
## p37_lugres5 + p25_sexo + v20a_emi + v12_cocina + pet_19,
## data = sample_candidates)
## lm(formula = aestudio ~ p40_lee + ocu_1d_13 + p26_edad + v16_desague +
## p31_afiliado + v06_piso + p53_ecivil + p25_sexo + v04_revoq +
## v12_cocina + v18a_bici + p42d_comuni + p37_lugres5 + p30b_caja +
## pet_19 + v07_aguapro + v18c_auto + v18h_calefon + v20a_emi,
## data = sample_candidates)
## lm(formula = aestudio ~ p40_lee + ocu_1d_13 + p26_edad + v16_desague +
## p31_afiliado + v06_piso + p53_ecivil + p25_sexo + v04_revoq +
## v12_cocina + v18a_bici + p42d_comuni + p37_lugres5 + p30b_caja +
## pet_19 + v07_aguapro + v18c_auto + v18h_calefon + v20a_emi,
## data = sample_candidates)
model_backward_BIC <- step(model_full,direction = "backward",trace = 0, k = log(nrow(sample_candidates)))
# model_backward_BIC %>% summary()model_forward_BIC <- step(model_null,scope = formula, direction = "forward",trace = 0, k = log(nrow(sample_candidates)))
# model_forward_BIC %>% summary()model_both_BIC <- step(model_null,scope = formula, direction = "both", trace = 0,k = log(nrow(sample_candidates)))
# model_both_BIC %>% summary()## lm(formula = aestudio ~ p26_edad + ocu_1d_13 + p40_lee + v16_desague +
## p30b_caja + v04_revoq + p25_sexo, data = sample_candidates)
## lm(formula = aestudio ~ p40_lee + ocu_1d_13 + p26_edad + v16_desague +
## p31_afiliado + v04_revoq + p25_sexo + v12_cocina, data = sample_candidates)
## lm(formula = aestudio ~ p40_lee + ocu_1d_13 + p26_edad + v16_desague +
## p31_afiliado + v04_revoq + p25_sexo + v12_cocina, data = sample_candidates)
model_backward_BIC\(call %>% print() model_forward_BIC\)call %>% print() model_both_BIC$call %>% print()
##
## Call:
## lm(formula = aestudio ~ p26_edad + ocu_1d_13 + p40_lee + v16_desague +
## p30b_caja + v04_revoq + p25_sexo, data = sample_candidates)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.932 -2.581 0.293 2.256 17.511
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.025162 3.585235 6.701 2.61e-11 ***
## p26_edad -0.102984 0.004832 -21.314 < 2e-16 ***
## ocu_1d_131 -2.959064 3.975656 -0.744 0.456776
## ocu_1d_132 -1.889711 3.576398 -0.528 0.597286
## ocu_1d_133 -4.836819 3.600891 -1.343 0.179334
## ocu_1d_134 -5.611807 3.625485 -1.548 0.121794
## ocu_1d_135 -7.815453 3.564550 -2.193 0.028443 *
## ocu_1d_136 -9.521052 3.559799 -2.675 0.007536 **
## ocu_1d_137 -8.461714 3.564479 -2.374 0.017685 *
## ocu_1d_138 -8.570574 3.571843 -2.399 0.016500 *
## ocu_1d_139 -9.741265 3.589283 -2.714 0.006699 **
## ocu_1d_1397 -6.961707 3.605236 -1.931 0.053610 .
## ocu_1d_1399 -10.090738 3.617356 -2.790 0.005323 **
## ocu_1d_13Missing -8.262454 3.562049 -2.320 0.020453 *
## p40_lee2 -5.811020 0.313822 -18.517 < 2e-16 ***
## p40_lee9 -7.321805 1.348128 -5.431 6.21e-08 ***
## v16_desague2 0.019360 0.326278 0.059 0.952690
## v16_desague3 -1.404211 0.213205 -6.586 5.61e-11 ***
## v16_desague4 -1.200082 0.547878 -2.190 0.028597 *
## v16_desague5 -2.246377 0.831129 -2.703 0.006928 **
## v16_desague6 -1.317273 0.750439 -1.755 0.079339 .
## v16_desagueMissing -1.259412 0.220337 -5.716 1.24e-08 ***
## p30b_caja2 -1.159032 0.233110 -4.972 7.13e-07 ***
## p30b_caja9 -0.549945 0.777060 -0.708 0.479190
## v04_revoq2 -0.780262 0.169905 -4.592 4.63e-06 ***
## v04_revoqMissing 0.652346 0.515976 1.264 0.206257
## p25_sexo2 0.602533 0.161599 3.729 0.000197 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.543 on 2233 degrees of freedom
## Multiple R-squared: 0.529, Adjusted R-squared: 0.5235
## F-statistic: 96.47 on 26 and 2233 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = aestudio ~ p40_lee + ocu_1d_13 + p26_edad + v16_desague +
## p31_afiliado + v04_revoq + p25_sexo + v12_cocina, data = sample_candidates)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.9372 -2.5098 0.2672 2.2435 17.8286
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.718375 3.558611 6.384 2.09e-10 ***
## p40_lee2 -5.815999 0.312229 -18.627 < 2e-16 ***
## p40_lee9 -7.253293 1.341683 -5.406 7.13e-08 ***
## ocu_1d_131 -2.904840 3.958019 -0.734 0.46308
## ocu_1d_132 -1.850918 3.559916 -0.520 0.60316
## ocu_1d_133 -4.966381 3.584898 -1.385 0.16608
## ocu_1d_134 -5.514007 3.608393 -1.528 0.12663
## ocu_1d_135 -7.731044 3.548447 -2.179 0.02946 *
## ocu_1d_136 -9.434455 3.543657 -2.662 0.00782 **
## ocu_1d_137 -8.408634 3.548310 -2.370 0.01788 *
## ocu_1d_138 -8.556907 3.555635 -2.407 0.01618 *
## ocu_1d_139 -9.669341 3.572871 -2.706 0.00686 **
## ocu_1d_1397 -6.866536 3.588738 -1.913 0.05583 .
## ocu_1d_1399 -9.882508 3.601084 -2.744 0.00611 **
## ocu_1d_13Missing -8.219553 3.545954 -2.318 0.02054 *
## p26_edad -0.102137 0.004801 -21.272 < 2e-16 ***
## v16_desague2 0.077416 0.324817 0.238 0.81164
## v16_desague3 -1.411071 0.212120 -6.652 3.62e-11 ***
## v16_desague4 -1.197858 0.545128 -2.197 0.02810 *
## v16_desague5 -2.088753 0.827902 -2.523 0.01171 *
## v16_desague6 -1.397162 0.744947 -1.876 0.06085 .
## v16_desagueMissing -1.382140 0.222034 -6.225 5.74e-10 ***
## p31_afiliado2 1.518594 0.288538 5.263 1.55e-07 ***
## p31_afiliado3 3.118287 0.988771 3.154 0.00163 **
## p31_afiliado4 -0.218776 0.204459 -1.070 0.28472
## p31_afiliado9 0.691986 0.560125 1.235 0.21681
## v04_revoq2 -0.833052 0.172271 -4.836 1.42e-06 ***
## v04_revoqMissing 0.791849 0.517508 1.530 0.12613
## p25_sexo2 0.638829 0.161676 3.951 8.02e-05 ***
## v12_cocina2 0.508947 0.184122 2.764 0.00575 **
## v12_cocinaMissing NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.527 on 2230 degrees of freedom
## Multiple R-squared: 0.5339, Adjusted R-squared: 0.5279
## F-statistic: 88.1 on 29 and 2230 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = aestudio ~ p40_lee + ocu_1d_13 + p26_edad + v16_desague +
## p31_afiliado + v04_revoq + p25_sexo + v12_cocina, data = sample_candidates)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.9372 -2.5098 0.2672 2.2435 17.8286
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.718375 3.558611 6.384 2.09e-10 ***
## p40_lee2 -5.815999 0.312229 -18.627 < 2e-16 ***
## p40_lee9 -7.253293 1.341683 -5.406 7.13e-08 ***
## ocu_1d_131 -2.904840 3.958019 -0.734 0.46308
## ocu_1d_132 -1.850918 3.559916 -0.520 0.60316
## ocu_1d_133 -4.966381 3.584898 -1.385 0.16608
## ocu_1d_134 -5.514007 3.608393 -1.528 0.12663
## ocu_1d_135 -7.731044 3.548447 -2.179 0.02946 *
## ocu_1d_136 -9.434455 3.543657 -2.662 0.00782 **
## ocu_1d_137 -8.408634 3.548310 -2.370 0.01788 *
## ocu_1d_138 -8.556907 3.555635 -2.407 0.01618 *
## ocu_1d_139 -9.669341 3.572871 -2.706 0.00686 **
## ocu_1d_1397 -6.866536 3.588738 -1.913 0.05583 .
## ocu_1d_1399 -9.882508 3.601084 -2.744 0.00611 **
## ocu_1d_13Missing -8.219553 3.545954 -2.318 0.02054 *
## p26_edad -0.102137 0.004801 -21.272 < 2e-16 ***
## v16_desague2 0.077416 0.324817 0.238 0.81164
## v16_desague3 -1.411071 0.212120 -6.652 3.62e-11 ***
## v16_desague4 -1.197858 0.545128 -2.197 0.02810 *
## v16_desague5 -2.088753 0.827902 -2.523 0.01171 *
## v16_desague6 -1.397162 0.744947 -1.876 0.06085 .
## v16_desagueMissing -1.382140 0.222034 -6.225 5.74e-10 ***
## p31_afiliado2 1.518594 0.288538 5.263 1.55e-07 ***
## p31_afiliado3 3.118287 0.988771 3.154 0.00163 **
## p31_afiliado4 -0.218776 0.204459 -1.070 0.28472
## p31_afiliado9 0.691986 0.560125 1.235 0.21681
## v04_revoq2 -0.833052 0.172271 -4.836 1.42e-06 ***
## v04_revoqMissing 0.791849 0.517508 1.530 0.12613
## p25_sexo2 0.638829 0.161676 3.951 8.02e-05 ***
## v12_cocina2 0.508947 0.184122 2.764 0.00575 **
## v12_cocinaMissing NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.527 on 2230 degrees of freedom
## Multiple R-squared: 0.5339, Adjusted R-squared: 0.5279
## F-statistic: 88.1 on 29 and 2230 DF, p-value: < 2.2e-16
## covariats for the AIC
covariates <- names(model_both$coefficients)
covariates <- covariates[covariates != "(Intercept)"]
covariates %>% length()## [1] 75
## [1] "p40_lee2" "p40_lee9" "ocu_1d_131"
## [4] "ocu_1d_132" "ocu_1d_133" "ocu_1d_134"
## [7] "ocu_1d_135" "ocu_1d_136" "ocu_1d_137"
## [10] "ocu_1d_138" "ocu_1d_139" "ocu_1d_1397"
## [13] "ocu_1d_1399" "ocu_1d_13Missing" "p26_edad"
## [16] "v16_desague2" "v16_desague3" "v16_desague4"
## [19] "v16_desague5" "v16_desague6" "v16_desagueMissing"
## [22] "p31_afiliado2" "p31_afiliado3" "p31_afiliado4"
## [25] "p31_afiliado9" "v06_piso2" "v06_piso3"
## [28] "v06_piso4" "v06_piso5" "v06_piso6"
## [31] "v06_piso7" "v06_piso8" "v06_piso9"
## [34] "v06_pisoMissing" "p53_ecivil2" "p53_ecivil3"
## [37] "p53_ecivil4" "p53_ecivil5" "p53_ecivil6"
## [40] "p53_ecivil9" "p25_sexo2" "v04_revoq2"
## [43] "v04_revoqMissing" "v12_cocina2" "v12_cocinaMissing"
## [46] "v18a_bici2" "v18a_bici9" "v18a_biciMissing"
## [49] "p42d_comuni2" "p42d_comuni3" "p42d_comuni4"
## [52] "p42d_comuni9" "p37_lugres52" "p37_lugres53"
## [55] "p37_lugres59" "p30b_caja2" "p30b_caja9"
## [58] "pet_199" "v07_aguapro2" "v07_aguapro3"
## [61] "v07_aguapro4" "v07_aguapro5" "v07_aguapro6"
## [64] "v07_aguapro7" "v07_aguapro8" "v07_aguapro9"
## [67] "v07_aguaproMissing" "v18c_auto2" "v18c_auto9"
## [70] "v18c_autoMissing" "v18h_calefon2" "v18h_calefon9"
## [73] "v18h_calefonMissing" "v20a_emi2" "v20a_emiMissing"
## covariats for the BIC
covariates <- names(model_both_BIC$coefficients)
covariates <- covariates[covariates != "(Intercept)"]
covariates %>% length()## [1] 30
## [1] "p40_lee2" "p40_lee9" "ocu_1d_131"
## [4] "ocu_1d_132" "ocu_1d_133" "ocu_1d_134"
## [7] "ocu_1d_135" "ocu_1d_136" "ocu_1d_137"
## [10] "ocu_1d_138" "ocu_1d_139" "ocu_1d_1397"
## [13] "ocu_1d_1399" "ocu_1d_13Missing" "p26_edad"
## [16] "v16_desague2" "v16_desague3" "v16_desague4"
## [19] "v16_desague5" "v16_desague6" "v16_desagueMissing"
## [22] "p31_afiliado2" "p31_afiliado3" "p31_afiliado4"
## [25] "p31_afiliado9" "v04_revoq2" "v04_revoqMissing"
## [28] "p25_sexo2" "v12_cocina2" "v12_cocinaMissing"